“Files Used:”
Z:/COVID-19_WastewaterAnalysis/data/processed/MMSD_Interceptor_Cases_2_7_22.csv
Z:/COVID-19_WastewaterAnalysis/data/processed/LIMSWasteData_02-09-22.csv
SizeUsed = 1
alphaUsed = .9
N1ShapeUnit = 4
N2ShapeUnit = 5
MadDF <- filter(FullDF,Site=="Madison")%>%
mutate(FlagedOutliersN1 = IdentifyOutliers(N1,Bin = 21, Action = "Flag"),
FlagedOutliersN2 = IdentifyOutliers(N2,Bin = 21, Action = "Flag"),
NoOutlierVarN1 = ifelse(FlagedOutliersN1, NA, N1),
NoOutlierVarN2 = ifelse(FlagedOutliersN2, NA, N2),)
NonOuliersDF <- MadDF%>%
mutate(Outlier = ifelse(FlagedOutliersN1,N1,NA))%>%
mutate(N1 = NoOutlierVarN1,
N2 = NoOutlierVarN2)
OutLierPlotDF <- MadDF%>%
mutate(OutlierN1 = ifelse(FlagedOutliersN1,N1,NA),
OutlierN2 = ifelse(FlagedOutliersN2,N2,NA))%>%
mutate(N1 = NoOutlierVarN1,
N2 = NoOutlierVarN2)%>%
#filter(!(is.na(N1)&is.na(Outlier)))%>%
ggplot(aes(x=Date))+#Data depends on time
geom_point(aes(y=N1,
color="N1",
info = N1),data=NonOuliersDF,size=.5)+#compares N1
geom_point(aes(y=N2,
color="N2",
info = N2),data=NonOuliersDF,size=.5)+#compares N1
geom_point(aes(y=OutlierN1,
color="N1 Outlier",
info = OutlierN1))+
geom_point(aes(y=OutlierN2,
color="N2 Outlier",
info = OutlierN2))+
theme_light()# +
#scale_y_log10()+
#scale_color_manual(values=c("#F8766D","#999999"))
ggplotly(OutLierPlotDF)
#"2021-06-08","2021-10-17","2021-05-02","2021-01-26"
#make shape, not color change
#jittering
IntercepterDF <- FullDF %>%
group_by(Site)%>%
mutate(FudgeFactor = mean(N1))%>%#Mean of sites to see if it works as normalizer
filter(Site != "Madison") #We are looking for agreement between the interceptors
#after some normalization so Madison just distracts
IntercepterOverLay <- IntercepterDF%>%
ggplot(aes(x=Date))+
geom_point(aes(y=N1,color = Site),size = SizeUsed, alpha= alphaUsed,shape=N1ShapeUnit)+
geom_point(aes(y=N2,color = Site),size = SizeUsed, alpha= alphaUsed,shape=N2ShapeUnit)+
theme_light() +
scale_y_log10()
ggplotly(IntercepterOverLay)
IntercepterOverLayFlow <- IntercepterDF%>%
ggplot(aes(x=Date))+
geom_point(aes(y=N1*FlowRate,color = Site),size = SizeUsed, alpha= alphaUsed,shape=N1ShapeUnit)+
geom_point(aes(y=N2*FlowRate,color = Site),size = SizeUsed, alpha= alphaUsed,shape=N2ShapeUnit)+
theme_light() +
scale_y_log10()
ggplotly(IntercepterOverLayFlow)
IntercepterOverLayPop <- IntercepterDF%>%
ggplot(aes(x=Date))+
geom_point(aes(y=N1/Pop,color = Site),size = SizeUsed, alpha= alphaUsed,shape=N1ShapeUnit)+
geom_point(aes(y=N2/Pop, color = Site),size = SizeUsed, alpha= alphaUsed,shape=N2ShapeUnit)+
theme_light() +
scale_y_log10()
ggplotly(IntercepterOverLayPop)
#Get P18 to pop out
TieMethod <- "average"
IntercepterDF%>%
ggplot()+
geom_histogram(aes(x=log(N1)),bins=15)+
facet_wrap(~Site)
IntercepterCoreDF <- IntercepterDF%>%
filter(!is.na(N1))%>%
group_by(Date)%>%
filter(n()==5)
IntercepterCoreDF%>%
group_by(Date)%>%
mutate(Ranking = frank(N1,ties.method = TieMethod))%>%
group_by(Site)%>%
summarize(N1AverageRanking = mean(Ranking, na.rm = TRUE))
## # A tibble: 5 x 2
## Site N1AverageRanking
## <chr> <dbl>
## 1 MMSD-P11 2.73
## 2 MMSD-P18 3.87
## 3 MMSD-P2 3.21
## 4 MMSD-P7 2.91
## 5 MMSD-P8 2.28
IntercepterCoreDF%>%
group_by(Date)%>%
mutate(Ranking = frank(N1*FlowRate, ties.method = TieMethod))%>%
group_by(Site)%>%
summarize(N1FlowAverageRanking = mean(Ranking, na.rm = TRUE))
## # A tibble: 5 x 2
## Site N1FlowAverageRanking
## <chr> <dbl>
## 1 MMSD-P11 3.26
## 2 MMSD-P18 4.60
## 3 MMSD-P2 2.92
## 4 MMSD-P7 2.02
## 5 MMSD-P8 2.20
IntercepterCoreDF%>%
group_by(Date)%>%
mutate(Ranking = frank(N1/Pop,ties.method = TieMethod))%>%
group_by(Site)%>%
summarize(N1PopAverageRanking = mean(Ranking, na.rm = TRUE))
## # A tibble: 5 x 2
## Site N1PopAverageRanking
## <chr> <dbl>
## 1 MMSD-P11 1.45
## 2 MMSD-P18 3.68
## 3 MMSD-P2 3.07
## 4 MMSD-P7 4.25
## 5 MMSD-P8 2.55
IntercepterCoreDF%>%
group_by(Date)%>%
mutate(Ranking = frank(N1, ties.method = TieMethod))%>%
group_by(Site,Ranking)%>%
ggplot()+
geom_histogram(aes(x=Ranking,fill=Site),position = "dodge")
#Redo without P18 || The problamatic months
#Compare results between normalizations
IntercepterChangeDF <- IntercepterDF%>%
filter(!is.na(N1))%>%
mutate(ChangeN1 = lead(N1) - N1,
ChangeN2 = lead(N2) - N2,
PerChangeN1 = log(lead(N1) - N1),
PerChangeN2 = log(lead(N2) - N2))
IntercepterChangeOverLay <- IntercepterChangeDF%>%
ggplot(aes(x=Date))+
geom_point(aes(y = ChangeN1, color = Site), size = SizeUsed,
alpha = alphaUsed, shape=N1ShapeUnit)+
geom_point(aes(y = ChangeN2,color = Site), size = SizeUsed,
alpha = alphaUsed, shape = N2ShapeUnit)+
theme_light()#+
#scale_y_log10()
ggplotly(IntercepterChangeOverLay)
IntercepterPerChangeOverLay <- IntercepterChangeDF%>%
ggplot(aes(x = Date))+
geom_point(aes(y = PerChangeN1, color = Site), size = SizeUsed,
alpha = alphaUsed, shape = N1ShapeUnit)+
geom_point(aes(y = PerChangeN2, color = Site), size = SizeUsed,
alpha= alphaUsed,shape = N2ShapeUnit)+
theme_light()#+
#scale_y_log10()
ggplotly(IntercepterPerChangeOverLay)
LoessFunc <- function(SiteFilter,DF,SpanConstant = .163){
MainDF <- DF%>%
filter(Site==SiteFilter)
MainDF[["loessN1"]] <- loessFit(y=(MainDF[["N1"]]),
x=MainDF$Date, #create loess fit of the data
span=SpanConstant, #span of .2 seems to give the best result, not rigorously chosen
iterations=5)$fitted#2 iterations remove some bad patterns
return(MainDF)
}
SiteLoessDF <- lapply(c("MMSD-P11","MMSD-P18","MMSD-P2","MMSD-P7","MMSD-P8"),
LoessFunc,IntercepterDF,SpanConstant=.2)%>%
bind_rows()
A <- SiteLoessDF%>%
filter(!is.na(loessN1))%>%
ggplot(aes(x=Date))+
geom_point(aes(y=N1,color=Site),data=IntercepterDF,size=.5,alpha=.5)+
geom_line(aes(y=loessN1,color=Site))+
scale_y_log10()
ggplotly(A)
#Facet by normalizations, log/non log, orderered
#landscape 3 by 1
#Thumbnails
#Combine with N2?
#For comparisions we dont need points
Mean <- 11.73
StandardDeviation <- 7.68
Scale = StandardDeviation^2/Mean
Shape = Mean/Scale
SLDWidth <- 21
weights <- dgamma(1:SLDWidth, scale = Scale, shape = Shape)
SiteLoessDF <- FullDF%>%
group_by(Site)%>%
arrange(Site,Date)%>%
filter(Site!="Madison")%>%
mutate(SevDayCases = rollmean(x = FirstConfirmed, 7, fill=0))%>%
arrange(Site,Date)%>%
group_by(Site)#%>%
#mutate(SLDCases = c(rep(NA, SLDWidth-1),
# rollapply(FirstConfirmed, width=SLDWidth, FUN=weighted.mean,
# w=weights,
# na.rm = FALSE)))
A <- SiteLoessDF%>%
filter(!is.na(SevDayCases),
SevDayCases != 0)%>%
ggplot(aes(x=Date))+
geom_point(aes(y = FirstConfirmed, color = Site, shape = Site),
data = IntercepterDF, size = .5, alpha = .5)+
geom_line(aes(y = SevDayCases, color = Site))+
#geom_line(aes(y = SLDCases, color = Site))+
scale_y_log10()
ggplotly(A)
B <- SiteLoessDF%>%
filter(!is.na(SevenDayMACases),
SevDayCases != 0)%>%
ggplot(aes(x=Date))+
geom_point(aes(y = FirstConfirmed / Pop, color = Site, shape = Site),
data = IntercepterDF, size = .5, alpha = .5)+
geom_line(aes(y = SevDayCases / Pop, color = Site))+
#geom_line(aes(y = SLDCases / Pop, color = Site))+
scale_y_log10()
ggplotly(B)